6  BioMart

访问Ensembl ID

查找智人(homo sapien)基因组

https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#introduction

Code
# BiocManager::install("biomaRt")
library(biomaRt)

listEnsembl()
#>         biomart                version
#> 1         genes      Ensembl Genes 115
#> 2 mouse_strains      Mouse strains 115
#> 3          snps  Ensembl Variation 115
#> 4    regulation Ensembl Regulation 115

# 连接ENSEMBL数据库   智人基因数据集
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
ensembl
#> Object of class 'Mart':
#>   Using the ENSEMBL_MART_ENSEMBL BioMart database
#>   Using the hsapiens_gene_ensembl dataset


# 获取基因信息
genes <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", 
                             "start_position", "end_position", "gene_biotype"),
              mart = ensembl)

genes |> DT::datatable()
Code
# 获取转录本信息
transcripts <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id",
                                   "transcript_start", "transcript_end"),
                    mart = ensembl)
transcripts |> DT::datatable()

GRCh38(Genome Research Consortium human genome build 38)

Code
# 人类基因BioMart
ensembl <- useEnsembl(biomart = "ensembl", 
                   dataset = "hsapiens_gene_ensembl", 
                   mirror = "asia")

6.0.1 映射

Code
listMarts()
#>                biomart                version
#> 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 115
#> 2   ENSEMBL_MART_MOUSE      Mouse strains 115
#> 3     ENSEMBL_MART_SNP  Ensembl Variation 115
#> 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 115
# 连接到Ensembl BioMart
mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl")
listAttributes(mart) |>  DT::datatable()
Code




entrez_id <- c("1017", "1018", "1019")  

getBM(attributes = c("entrezgene_id", "ensembl_gene_id", "external_gene_name"),
                  filters = "entrezgene_id",
                  values = entrez_id,,
                  mart = mart)
#>   entrezgene_id ensembl_gene_id external_gene_name
#> 1          1017 ENSG00000123374               CDK2
#> 2          1018 ENSG00000250506               CDK3
#> 3          1019 ENSG00000135446               CDK4

6.0.2 使用 Ensembl 的存档版本

Code
listEnsemblArchives()
#>              name     date                                 url version
#> 1  Ensembl GRCh37 Feb 2014          https://grch37.ensembl.org  GRCh37
#> 2     Ensembl 115 Sep 2025 https://sep2025.archive.ensembl.org     115
#> 3     Ensembl 114 May 2025 https://may2025.archive.ensembl.org     114
#> 4     Ensembl 113 Oct 2024 https://oct2024.archive.ensembl.org     113
#> 5     Ensembl 112 May 2024 https://may2024.archive.ensembl.org     112
#> 6     Ensembl 111 Jan 2024 https://jan2024.archive.ensembl.org     111
#> 7     Ensembl 110 Jul 2023 https://jul2023.archive.ensembl.org     110
#> 8     Ensembl 109 Feb 2023 https://feb2023.archive.ensembl.org     109
#> 9     Ensembl 108 Oct 2022 https://oct2022.archive.ensembl.org     108
#> 10    Ensembl 107 Jul 2022 https://jul2022.archive.ensembl.org     107
#> 11    Ensembl 106 Apr 2022 https://apr2022.archive.ensembl.org     106
#> 12    Ensembl 105 Dec 2021 https://dec2021.archive.ensembl.org     105
#> 13    Ensembl 104 May 2021 https://may2021.archive.ensembl.org     104
#> 14    Ensembl 103 Feb 2021 https://feb2021.archive.ensembl.org     103
#> 15    Ensembl 102 Nov 2020 https://nov2020.archive.ensembl.org     102
#> 16    Ensembl 101 Aug 2020 https://aug2020.archive.ensembl.org     101
#> 17    Ensembl 100 Apr 2020 https://apr2020.archive.ensembl.org     100
#> 18     Ensembl 80 May 2015 https://may2015.archive.ensembl.org      80
#> 19     Ensembl 77 Oct 2014 https://oct2014.archive.ensembl.org      77
#> 20     Ensembl 75 Feb 2014 https://feb2014.archive.ensembl.org      75
#> 21     Ensembl 54 May 2009 https://may2009.archive.ensembl.org      54
#>    current_release
#> 1                 
#> 2                *
#> 3                 
#> 4                 
#> 5                 
#> 6                 
#> 7                 
#> 8                 
#> 9                 
#> 10                
#> 11                
#> 12                
#> 13                
#> 14                
#> 15                
#> 16                
#> 17                
#> 18                
#> 19                
#> 20                
#> 21
listEnsembl(version = 112)
#>         biomart                version
#> 1         genes      Ensembl Genes 112
#> 2 mouse_strains      Mouse strains 112
#> 3          snps  Ensembl Variation 112
#> 4    regulation Ensembl Regulation 112
ensembl_112 <- useEnsembl(biomart = 'genes', 
                       dataset = 'hsapiens_gene_ensembl',
                       mirror = "asia",
                       version = 112)

6.0.3 使用 Ensembl 基因组

Code
listEnsemblGenomes()
#>               biomart                        version
#> 1       protists_mart      Ensembl Protists Genes 62
#> 2 protists_variations Ensembl Protists Variations 62
#> 3          fungi_mart         Ensembl Fungi Genes 62
#> 4    fungi_variations    Ensembl Fungi Variations 62
#> 5        metazoa_mart       Ensembl Metazoa Genes 62
#> 6  metazoa_variations  Ensembl Metazoa Variations 62
#> 7         plants_mart        Ensembl Plants Genes 62
#> 8   plants_variations   Ensembl Plants Variations 62
ensembl_plants <- useEnsemblGenomes(biomart = "plants_mart")
searchDatasets(ensembl_plants, pattern = "Arabidopsis")
#>              dataset                         description version
#> 6   ahalleri_eg_gene Arabidopsis halleri genes (Ahal2.2) Ahal2.2
#> 10   alyrata_eg_gene    Arabidopsis lyrata genes (v.1.0)   v.1.0
#> 15 athaliana_eg_gene Arabidopsis thaliana genes (TAIR10)  TAIR10
ensembl_arabidopsis <- useEnsemblGenomes(biomart = "plants_mart", 
                                         dataset = "athaliana_eg_gene")